library(ggplot2)
library(vcfR)
library(adegenet)
library(adegraphics)
library(pegas)
library(StAMPP)
library(lattice)
library(gplots)
library(ape)
library(ggmap) 

Read in VCF file with

full.ery.vcf <- read.vcfR("~/Dropbox/full.ery/populations.snps.vcf") #read in all data
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 118607
##   column count: 65
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 118607
##   Character matrix gt cols: 65
##   skip: 0
##   nrows: 118607
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant: 118607
## All variants processed
head(full.ery.vcf) #check the vcf object
## [1] "***** Object of class 'vcfR' *****"
## [1] "***** Meta section *****"
## [1] "##fileformat=VCFv4.2"
## [1] "##fileDate=20190602"
## [1] "##source=\"Stacks v2.3b\""
## [1] "##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total Depth for Each Allele\">"
## [1] "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">"
## [1] "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">"
## [1] "First 6 rows."
## [1] 
## [1] "***** Fixed section *****"
##      CHROM  POS      ID         REF ALT QUAL FILTER
## [1,] "chr1" "727446" "20:72:-"  "G" "A" NA   "PASS"
## [2,] "chr1" "727477" "20:41:-"  "T" "A" NA   "PASS"
## [3,] "chr1" "727563" "19:144:+" "G" "T" NA   "PASS"
## [4,] "chr1" "727631" "22:21:+"  "T" "C" NA   "PASS"
## [5,] "chr1" "727658" "22:48:+"  "T" "A" NA   "PASS"
## [6,] "chr1" "727665" "22:55:+"  "A" "G" NA   "PASS"
## [1] 
## [1] "***** Genotype section *****"
##      FORMAT           E_papuana_12155 E_papuana_12856 E_papuana_14618
## [1,] "GT:DP:AD:GQ:GL" NA              NA              NA             
## [2,] "GT:DP:AD:GQ:GL" NA              NA              NA             
## [3,] "GT:DP:AD:GQ:GL" NA              NA              NA             
## [4,] "GT:DP:AD:GQ:GL" NA              NA              NA             
## [5,] "GT:DP:AD:GQ:GL" NA              NA              NA             
## [6,] "GT:DP:AD:GQ:GL" NA              NA              NA             
##      E_papuana_14621 E_papuana_14622                  
## [1,] NA              NA                               
## [2,] NA              NA                               
## [3,] NA              NA                               
## [4,] NA              "1/1:3:0,3:20:-8.92,-1.51,-0.01" 
## [5,] NA              "0/0:3:3,0:31:-0.00,-2.54,-10.83"
## [6,] NA              "0/0:3:3,0:30:-0.00,-2.44,-10.67"
## [1] "First 6 columns only."
## [1] 
## [1] "Unique GT formats:"
## [1] "GT:DP:AD:GQ:GL"
## [1]
full.ery.vcf@fix[1:10,1:5] #check 
##       CHROM  POS      ID         REF ALT
##  [1,] "chr1" "727446" "20:72:-"  "G" "A"
##  [2,] "chr1" "727477" "20:41:-"  "T" "A"
##  [3,] "chr1" "727563" "19:144:+" "G" "T"
##  [4,] "chr1" "727631" "22:21:+"  "T" "C"
##  [5,] "chr1" "727658" "22:48:+"  "T" "A"
##  [6,] "chr1" "727665" "22:55:+"  "A" "G"
##  [7,] "chr1" "727730" "22:120:+" "G" "A"
##  [8,] "chr1" "727733" "22:123:+" "G" "A"
##  [9,] "chr1" "727742" "22:132:+" "G" "A"
## [10,] "chr1" "727821" "27:8:+"   "G" "T"
#quick check read depth distribution per individual
dp <- extract.gt(full.ery.vcf, element='DP', as.numeric=TRUE)
#pdf("DP_RAD_data.pdf", width = 10, height=3) # boxplot
par(mar=c(8,4,1,1)) 
boxplot(dp, las=3, col=c("#C0C0C0", "#808080"), ylab="Read Depth (DP)",
        las=2, cex=0.4, cex.axis=0.5)

#dev.off()
#zoom to smaller values
#pdf("DP_RAD_data_zoom.pdf", width = 10, height=3) # boxplot
par(mar=c(8,4,1,1))
boxplot(dp, las=3, col=c("#C0C0C0", "#808080"), ylab="Read Depth (DP)",
        las=2, cex=0.4, cex.axis=0.5, ylim=c(0,50))
abline(h=8, col="red")

#dev.off() 
### convert to genlight
full.ery.genlight <- vcfR2genlight(full.ery.vcf, n.cores=1)
#locNames(aa.genlight) <- paste(vcf@fix[,1],vcf@fix[,2],sep="_") # add real SNP.names
#SET NAMES HERE
# add popnames: here "population" (group) names are chars 5,6,7 of ind name 
pop(full.ery.genlight)<-substr(indNames(full.ery.genlight),3,5) 

# check the genlight
full.ery.genlight # check the basic info on the genlight object
##  /// GENLIGHT OBJECT /////////
## 
##  // 56 genotypes,  118,607 binary SNPs, size: 18.3 Mb
##  1964815 (29.58 %) missing data
## 
##  // Basic content
##    @gen: list of 56 SNPbin
## 
##  // Optional content
##    @ind.names:  56 individual labels
##    @loc.names:  118607 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @pop: population of each individual (group size range: 4-39)
##    @other: a list containing: elements without names
indNames(full.ery.genlight) # check individual names
##  [1] "E_papuana_12155"    "E_papuana_12856"    "E_papuana_14618"   
##  [4] "E_papuana_14621"    "E_papuana_14622"    "E_papuana_14624"   
##  [7] "E_papuana_16064"    "E_papuana_16434"    "E_trichroa_4702"   
## [10] "E_trichroa_4766"    "E_trichroa_4772"    "E_trichroa_4775"   
## [13] "E_trichroa_12043"   "E_trichroa_12054"   "E_trichroa_12061"  
## [16] "E_trichroa_12062"   "E_trichroa_12254"   "E_trichroa_12301"  
## [19] "E_trichroa_12846"   "E_trichroa_16071"   "E_trichroa_16075"  
## [22] "E_trichroa_16086"   "E_trichroa_16164"   "E_trichroa_16234"  
## [25] "E_trichroa_16248"   "E_trichroa_16315"   "E_trichroa_16548"  
## [28] "E_trichroa_18292"   "E_trichroa_18295"   "E_trichroa_18320"  
## [31] "E_trichroa_18327"   "E_trichroa_18389"   "E_trichroa_18401"  
## [34] "E_trichroa_27881"   "E_trichroa_27882"   "E_trichroa_23619"  
## [37] "E_trichroa_B32623"  "E_trichroa_B52739"  "E_trichroa_34978"  
## [40] "E_trichroa_35027"   "E_trichroa_32805"   "E_trichroa_32819"  
## [43] "E_trichroa_DOT-209" "E_trichroa_32834"   "E_trichroa_32797"  
## [46] "E_trichroa_32838"   "E_trichroa_32757"   "E_coloria_28439"   
## [49] "E_coloria_28562"    "E_coloria_28408"    "E_coloria_28582"   
## [52] "E_pealii_26533"     "E_pealii_22532"     "E_pealii_24275"    
## [55] "E_pealii_22533"     "E_pealii_22531"
# N missing SNPs per sample
x <- summary(t(as.matrix(full.ery.genlight)))
individs<-colnames(x)
num.missing.snps<-x[7,]
#create DF with missing snp info
reffed.missing.snps<-data.frame(individs, num.missing.snps)
rownames(reffed.missing.snps) <- NULL
reffed.missing.snps$num.missing.snps <- gsub("NA's   :", "", reffed.missing.snps$num.missing.snps)
reffed.missing.snps$num.missing.snps<- as.numeric(reffed.missing.snps$num.missing.snps)
reffed.missing.snps$miss.percentage<-(reffed.missing.snps$num.missing.snps/full.ery.genlight$n.loc)
reffed.missing.snps
##              individs num.missing.snps miss.percentage
## 1     E_papuana_12155            49853       0.4203209
## 2     E_papuana_12856            23730       0.2000725
## 3     E_papuana_14618            40805       0.3440353
## 4     E_papuana_14621            24279       0.2047012
## 5     E_papuana_14622            19323       0.1629162
## 6     E_papuana_14624            27074       0.2282665
## 7     E_papuana_16064            18578       0.1566349
## 8     E_papuana_16434            20405       0.1720387
## 9     E_trichroa_4702           114340       0.9640240
## 10    E_trichroa_4766            34807       0.2934650
## 11    E_trichroa_4772            57289       0.4830153
## 12    E_trichroa_4775            26776       0.2257540
## 13   E_trichroa_12043            36349       0.3064659
## 14   E_trichroa_12054            27842       0.2347416
## 15   E_trichroa_12061            27237       0.2296407
## 16   E_trichroa_12062            26611       0.2243628
## 17   E_trichroa_12254            27101       0.2284941
## 18   E_trichroa_12301            46557       0.3925316
## 19   E_trichroa_12846            27991       0.2359979
## 20   E_trichroa_16071            27746       0.2339322
## 21   E_trichroa_16075            61174       0.5157706
## 22   E_trichroa_16086            27241       0.2296745
## 23   E_trichroa_16164            28166       0.2374733
## 24   E_trichroa_16234            33514       0.2825634
## 25   E_trichroa_16248            32827       0.2767712
## 26   E_trichroa_16315            27214       0.2294468
## 27   E_trichroa_16548            28491       0.2402135
## 28   E_trichroa_18292            30633       0.2582731
## 29   E_trichroa_18295            35247       0.2971747
## 30   E_trichroa_18320            39314       0.3314644
## 31   E_trichroa_18327            37198       0.3136240
## 32   E_trichroa_18389            33087       0.2789633
## 33   E_trichroa_18401            33059       0.2787272
## 34   E_trichroa_27881            31942       0.2693096
## 35   E_trichroa_27882            85162       0.7180183
## 36   E_trichroa_23619            29086       0.2452300
## 37  E_trichroa_B32623            73557       0.6201742
## 38  E_trichroa_B52739            24183       0.2038918
## 39   E_trichroa_34978            25581       0.2156787
## 40   E_trichroa_35027            30743       0.2592006
## 41   E_trichroa_32805            30602       0.2580118
## 42   E_trichroa_32819            29681       0.2502466
## 43 E_trichroa_DOT-209            46619       0.3930544
## 44   E_trichroa_32834            25308       0.2133770
## 45   E_trichroa_32797            27547       0.2322544
## 46   E_trichroa_32838            24927       0.2101647
## 47   E_trichroa_32757            30639       0.2583237
## 48    E_coloria_28439            29054       0.2449602
## 49    E_coloria_28562            37883       0.3193994
## 50    E_coloria_28408            42626       0.3593886
## 51    E_coloria_28582            29101       0.2453565
## 52     E_pealii_26533            34219       0.2885074
## 53     E_pealii_22532            40336       0.3400811
## 54     E_pealii_24275            26597       0.2242448
## 55     E_pealii_22533            29463       0.2484086
## 56     E_pealii_22531            28101       0.2369253

Three samples are missing over 50% of loci, all of which are trichroa. We will drop them.

# create new genlight using this selection
#drop the three samples that are over 50% missing SNPs
filtered.full.ery <- new("genlight",
                            (as.matrix(full.ery.genlight)[c(1:8,10:34,36,38:56), ]))

indNames(filtered.full.ery) # check individual names
##  [1] "E_papuana_12155"    "E_papuana_12856"    "E_papuana_14618"   
##  [4] "E_papuana_14621"    "E_papuana_14622"    "E_papuana_14624"   
##  [7] "E_papuana_16064"    "E_papuana_16434"    "E_trichroa_4766"   
## [10] "E_trichroa_4772"    "E_trichroa_4775"    "E_trichroa_12043"  
## [13] "E_trichroa_12054"   "E_trichroa_12061"   "E_trichroa_12062"  
## [16] "E_trichroa_12254"   "E_trichroa_12301"   "E_trichroa_12846"  
## [19] "E_trichroa_16071"   "E_trichroa_16075"   "E_trichroa_16086"  
## [22] "E_trichroa_16164"   "E_trichroa_16234"   "E_trichroa_16248"  
## [25] "E_trichroa_16315"   "E_trichroa_16548"   "E_trichroa_18292"  
## [28] "E_trichroa_18295"   "E_trichroa_18320"   "E_trichroa_18327"  
## [31] "E_trichroa_18389"   "E_trichroa_18401"   "E_trichroa_27881"  
## [34] "E_trichroa_23619"   "E_trichroa_B52739"  "E_trichroa_34978"  
## [37] "E_trichroa_35027"   "E_trichroa_32805"   "E_trichroa_32819"  
## [40] "E_trichroa_DOT-209" "E_trichroa_32834"   "E_trichroa_32797"  
## [43] "E_trichroa_32838"   "E_trichroa_32757"   "E_coloria_28439"   
## [46] "E_coloria_28562"    "E_coloria_28408"    "E_coloria_28582"   
## [49] "E_pealii_26533"     "E_pealii_22532"     "E_pealii_24275"    
## [52] "E_pealii_22533"     "E_pealii_22531"

Here we make a 50% complete data matrix with 93,861 SNPs And a 90% complete data matrix with 53,426 SNPs

#50
fiftypercent.filtered.full.ery <- new("genlight", (as.matrix(filtered.full.ery))
                                             [,(colSums(is.na (as.matrix(filtered.full.ery))) < 27)])
fiftypercent.filtered.full.ery$n.loc
## [1] 93861
fiftypercent.filtered.full.ery$ind.names
##  [1] "E_papuana_12155"    "E_papuana_12856"    "E_papuana_14618"   
##  [4] "E_papuana_14621"    "E_papuana_14622"    "E_papuana_14624"   
##  [7] "E_papuana_16064"    "E_papuana_16434"    "E_trichroa_4766"   
## [10] "E_trichroa_4772"    "E_trichroa_4775"    "E_trichroa_12043"  
## [13] "E_trichroa_12054"   "E_trichroa_12061"   "E_trichroa_12062"  
## [16] "E_trichroa_12254"   "E_trichroa_12301"   "E_trichroa_12846"  
## [19] "E_trichroa_16071"   "E_trichroa_16075"   "E_trichroa_16086"  
## [22] "E_trichroa_16164"   "E_trichroa_16234"   "E_trichroa_16248"  
## [25] "E_trichroa_16315"   "E_trichroa_16548"   "E_trichroa_18292"  
## [28] "E_trichroa_18295"   "E_trichroa_18320"   "E_trichroa_18327"  
## [31] "E_trichroa_18389"   "E_trichroa_18401"   "E_trichroa_27881"  
## [34] "E_trichroa_23619"   "E_trichroa_B52739"  "E_trichroa_34978"  
## [37] "E_trichroa_35027"   "E_trichroa_32805"   "E_trichroa_32819"  
## [40] "E_trichroa_DOT-209" "E_trichroa_32834"   "E_trichroa_32797"  
## [43] "E_trichroa_32838"   "E_trichroa_32757"   "E_coloria_28439"   
## [46] "E_coloria_28562"    "E_coloria_28408"    "E_coloria_28582"   
## [49] "E_pealii_26533"     "E_pealii_22532"     "E_pealii_24275"    
## [52] "E_pealii_22533"     "E_pealii_22531"
#90
ninetypercent.filtered.full.ery <- new("genlight", (as.matrix(filtered.full.ery))
                                      [,(colSums(is.na (as.matrix(filtered.full.ery))) < 6)])
ninetypercent.filtered.full.ery$n.loc
## [1] 53426
ninetypercent.filtered.full.ery$ind.names
##  [1] "E_papuana_12155"    "E_papuana_12856"    "E_papuana_14618"   
##  [4] "E_papuana_14621"    "E_papuana_14622"    "E_papuana_14624"   
##  [7] "E_papuana_16064"    "E_papuana_16434"    "E_trichroa_4766"   
## [10] "E_trichroa_4772"    "E_trichroa_4775"    "E_trichroa_12043"  
## [13] "E_trichroa_12054"   "E_trichroa_12061"   "E_trichroa_12062"  
## [16] "E_trichroa_12254"   "E_trichroa_12301"   "E_trichroa_12846"  
## [19] "E_trichroa_16071"   "E_trichroa_16075"   "E_trichroa_16086"  
## [22] "E_trichroa_16164"   "E_trichroa_16234"   "E_trichroa_16248"  
## [25] "E_trichroa_16315"   "E_trichroa_16548"   "E_trichroa_18292"  
## [28] "E_trichroa_18295"   "E_trichroa_18320"   "E_trichroa_18327"  
## [31] "E_trichroa_18389"   "E_trichroa_18401"   "E_trichroa_27881"  
## [34] "E_trichroa_23619"   "E_trichroa_B52739"  "E_trichroa_34978"  
## [37] "E_trichroa_35027"   "E_trichroa_32805"   "E_trichroa_32819"  
## [40] "E_trichroa_DOT-209" "E_trichroa_32834"   "E_trichroa_32797"  
## [43] "E_trichroa_32838"   "E_trichroa_32757"   "E_coloria_28439"   
## [46] "E_coloria_28562"    "E_coloria_28408"    "E_coloria_28582"   
## [49] "E_pealii_26533"     "E_pealii_22532"     "E_pealii_24275"    
## [52] "E_pealii_22533"     "E_pealii_22531"
pop(ninetypercent.filtered.full.ery)<-substr(indNames(ninetypercent.filtered.full.ery),3,5) 
pop(fiftypercent.filtered.full.ery)<-substr(indNames(fiftypercent.filtered.full.ery),3,5) 

Make a PCA of all retained individuals, with both data matrixes. Only 90% complete matrix shown here for computational efficiency, but both run locally and PCAs are identical.

#pca with all snps
pca.1 <- glPca(ninetypercent.filtered.full.ery, nf=5) # retain first 300 axes (for later use in find.clusters); slow function
#pca.2 <- glPca(fiftypercent.filtered.full.ery, nf=5) # retain first 300 axes (for later use in find.clusters); slow function

#quick plot
plot(pca.1$scores[,1], pca.1$scores[,2])

#plot(pca.2$scores[,1], pca.2$scores[,2])
#pull pca scores out of df
pca.scores<-as.data.frame(pca.1$scores)
#pca.scores2<-as.data.frame(pca.2$scores)
#ggplot color by species
ggplot(pca.scores, aes(x=PC1, y=PC2, color=pop(ninetypercent.filtered.full.ery))) +
  geom_point(cex = 2)

#ggplot(pca.scores2, aes(x=PC1, y=PC2, color=pop(ninetypercent.filtered.full.ery))) +
  geom_point(cex = 2)
## geom_point: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
#ggplot color by species
ggplot(pca.scores, aes(x=PC3, y=PC4, color=pop(ninetypercent.filtered.full.ery))) +
  geom_point(cex = 2)

Calculate Fst and Nei’s Distance between all of the species in our dataset (papuana, trichroa, coloria, pealii)

### Calculate Nei's distances between individuals/pops
# Nei's 1972 distance between indivs
ery.D.ind.90 <- stamppNeisD(ninetypercent.filtered.full.ery, pop = FALSE)
ery.D.ind.50 <- stamppNeisD(fiftypercent.filtered.full.ery, pop = FALSE)
# exportmatrix - for SplitsTree
#stamppPhylip(todi.D.ind, file="~/Desktop/Todiramphus/reffed.medfilt.todi.indiv_Neis_distance.phy.dst")
# Nei's 1972 distance between pops
ery.D.pop.90 <- stamppNeisD(ninetypercent.filtered.full.ery, pop = TRUE)
ery.D.pop.50 <- stamppNeisD(fiftypercent.filtered.full.ery, pop = TRUE)
# export
#stamppPhylip(todi.D.pop, file="~/Desktop/Todiramphus/reffed.medfilt.todi.pops_Neis_distance.phy.dst")
### Calculate pairwise Fst among populations
ninetypercent.filtered.full.ery@ploidy <- as.integer(ploidy(ninetypercent.filtered.full.ery))
fiftypercent.filtered.full.ery@ploidy <- as.integer(ploidy(fiftypercent.filtered.full.ery))
#
ery.fst.90<-stamppFst(ninetypercent.filtered.full.ery, nboots = 1, percent =95, nclusters=5)
ery.fst.50<-stamppFst(fiftypercent.filtered.full.ery, nboots = 1, percent =95, nclusters=5)
#modify the matrix for opening in SplitsTree
ery.fst.90
##            pap       tri       col pea
## pap         NA        NA        NA  NA
## tri 0.00589417        NA        NA  NA
## col 0.45864683 0.3787445        NA  NA
## pea 0.57304560 0.5200281 0.7947378  NA
ery.fst.50
##             pap       tri       col pea
## pap          NA        NA        NA  NA
## tri 0.006807597        NA        NA  NA
## col 0.439857644 0.3564122        NA  NA
## pea 0.551214028 0.4875244 0.7931585  NA
### heatmap of the indivs distance matrix
colnames(ery.D.ind.90) <- rownames(ery.D.ind.90)
colnames(ery.D.ind.50) <- rownames(ery.D.ind.50)

#pdf(file="~/Desktop/Todiramphus/medfilt.reffed.Neis_dist_heatmap.pdf", width=10, height=10)
#50% complete matrix
heatmap.2(ery.D.ind.50, trace="none", cexRow=0.4, cexCol=0.4)

#90% complete matrix
heatmap.2(ery.D.ind.90, trace="none", cexRow=0.4, cexCol=0.4)

#23619 is the Palau individual which comes out as sister to all of tri/pap
#32805 & DOT-209 are from Guadalcanal and Kolombangara, and are the only differentiated tri/pap
#dev.off() 
# plot unrooted NJ tree

#50% complete matrix
plot(nj(ery.D.ind.50), type = "unrooted", cex = .5)

#90% complete matrix
plot(nj(ery.D.ind.90), type = "unrooted", cex = .5)

50% complete and 90% complete data matrixes give identical population structure

so we now have confidence that missing data is not affecting population structure inference

we use the 90% complete matrix going forward

We will now make a subset dataset with only papuana and trichroa, and identify the trichroa by geography

#drop down to just papuana/trichroa to look closely for divergence
ninety.pap.tri.full.ery <- new("genlight",
                         (as.matrix(ninetypercent.filtered.full.ery)[c(1:44), ]))

pop(ninety.pap.tri.full.ery)<-c("pap","pap","pap","pap","pap","pap","pap","pap","png.tri","png.tri","png.tri","png.tri"
                               ,"png.tri","png.tri","png.tri","png.tri","png.tri","png.tri","png.tri","png.tri"
                               ,"png.tri","png.tri","png.tri","png.tri","png.tri","png.tri","png.tri","png.tri"
                               ,"png.tri","png.tri","png.tri","png.tri","png.tri","palau.tri","aus.tri","sols.makira.tri"
                               ,"sols.makira.tri","sols.guadalcanal.tri","sols.guadalcanal.tri","sols.kolombangara.tri","sols.tri"
                               ,"sols.guadalcanal.tri","sols.guadalcanal.tri","sols.malaita.tri")

Run PCA of only trichroa/papuana

#pca with all snps
pap.tri.pca.90 <- glPca(ninety.pap.tri.full.ery, nf=6) # retain first 300 axes (for later use in find.clusters); slow function

#pull pca scores out of df
pap.tri.pca.scores.90<-as.data.frame(pap.tri.pca.90$scores)

#ggplot color by species
ggplot(pap.tri.pca.scores.90, aes(x=PC1, y=PC2, color=pop(ninety.pap.tri.full.ery))) +
  geom_point(cex = 2)

#guadalcanal and kolombangara trichroa separate out on PC2
#Papuana, and trichroa from PNG, makira, malaita, and australia all cluster on PC 1&2

#ggplot color by species
ggplot(pap.tri.pca.scores.90, aes(x=PC3, y=PC4, color=pop(ninety.pap.tri.full.ery))) +
  geom_point(cex = 2)

#ggplot color by species
ggplot(pap.tri.pca.scores.90, aes(x=PC5, y=PC6, color=pop(ninety.pap.tri.full.ery))) +
  geom_point(cex = 2)

Run full population structure analysis and make NJ tree for just papuana/trichroa

### Calculate Nei's distances between individuals/pops
# Nei's 1972 distance between indivs
pap.tri.ery.D.ind.90 <- stamppNeisD(ninety.pap.tri.full.ery, pop = FALSE)
# exportmatrix - for SplitsTree
#stamppPhylip(todi.D.ind, file="~/Desktop/Todiramphus/reffed.medfilt.todi.indiv_Neis_distance.phy.dst")
# Nei's 1972 distance between pops
pap.tri.ery.D.pop.90 <- stamppNeisD(ninety.pap.tri.full.ery, pop = TRUE)
# export
#stamppPhylip(todi.D.pop, file="~/Desktop/Todiramphus/reffed.medfilt.todi.pops_Neis_distance.phy.dst")
### Calculate pairwise Fst among populations
ninety.pap.tri.full.ery@ploidy <- as.integer(ploidy(ninety.pap.tri.full.ery))
#
pap.tri.ery.fst.90<-stamppFst(ninety.pap.tri.full.ery, nboots = 1, percent =95, nclusters=5)
#modify the matrix for opening in SplitsTree
pap.tri.ery.fst.90
##                               pap      png.tri palau.tri    aus.tri
## pap                            NA           NA        NA         NA
## png.tri               0.007426551           NA        NA         NA
## palau.tri             0.235497051  0.208606072        NA         NA
## aus.tri               0.044974882  0.021747500       NaN         NA
## sols.makira.tri       0.010918219 -0.001794072 0.2383701 0.04339528
## sols.guadalcanal.tri  0.018612105  0.008956574 0.2179659 0.03111431
## sols.kolombangara.tri 0.069754533  0.050431976       NaN        NaN
## sols.tri              0.003267231 -0.016928971       NaN        NaN
## sols.malaita.tri      0.011999379 -0.011223495       NaN        NaN
##                       sols.makira.tri sols.guadalcanal.tri
## pap                                NA                   NA
## png.tri                            NA                   NA
## palau.tri                          NA                   NA
## aus.tri                            NA                   NA
## sols.makira.tri                    NA                   NA
## sols.guadalcanal.tri     0.0093390200                   NA
## sols.kolombangara.tri    0.0609972159          0.017726608
## sols.tri                -0.0002754384         -0.011301170
## sols.malaita.tri         0.0013947928          0.004669284
##                       sols.kolombangara.tri sols.tri sols.malaita.tri
## pap                                      NA       NA               NA
## png.tri                                  NA       NA               NA
## palau.tri                                NA       NA               NA
## aus.tri                                  NA       NA               NA
## sols.makira.tri                          NA       NA               NA
## sols.guadalcanal.tri                     NA       NA               NA
## sols.kolombangara.tri                    NA       NA               NA
## sols.tri                                NaN       NA               NA
## sols.malaita.tri                        NaN      NaN               NA
#palau trichroa ~.2 Fst with all other groups
#PNG trichroa / papuana = .007 Fst
### heatmap of the indivs distance matrix
colnames(pap.tri.ery.D.ind.90) <- rownames(pap.tri.ery.D.ind.90)

#pdf(file="~/Desktop/Todiramphus/medfilt.reffed.Neis_dist_heatmap.pdf", width=10, height=10)
heatmap.2(pap.tri.ery.D.ind.90, trace="none", cexRow=0.4, cexCol=0.4)

#23619 is the Palau individual which comes out as sister to all of tri/pap
#32805 & DOT-209 are from Guadalcanal and Kolombangara, and are the only differentiated tri/pap
#dev.off() 
# plot unrooted NJ tree
plot(nj(pap.tri.ery.D.ind.90), type = "unrooted", cex = .5)

The only individual that is different enough to be separated in the NJ tree is the Palau bird

Safe to say the pattern of no detectable divergence between E. papuana and E. trichroa is consistent here with 8 papuana and 25 PNG trichroa. Excluding E. trichroa pelewensis, we find no structure across 52 trichroa/papuana individuals in over 50,000 loci.

read in stacks populations Fst calculation between papuana and trichroa from PNG

full.ery.pap.ref.fst<-read.delim(file = "~/Dropbox/full.ery/populations.fst_papuana-trichroa.tsv")
mean(full.ery.pap.ref.fst$AMOVA.Fst)
## [1] 0.02466679
#avg. Fst = .025

clean chromosomes up

#combine unmapped chromosomes
levels(full.ery.pap.ref.fst$Chr)
##   [1] "chr1"                      "chr1_ABQF01066038_random" 
##   [3] "chr1_ABQF01112931_random"  "chr1_ABQF01116611_random" 
##   [5] "chr1_EQ832370_random"      "chr1_EQ832384_random"     
##   [7] "chr1_EQ832422_random"      "chr10"                    
##   [9] "chr10_ABQF01085055_random" "chr10_EQ832874_random"    
##  [11] "chr10_EQ832893_random"     "chr11_EQ832924_random"    
##  [13] "chr12_EQ832950_random"     "chr12_KB442361_random"    
##  [15] "chr13"                     "chr13_EQ832957_random"    
##  [17] "chr13_EQ832959_random"     "chr13_EQ832971_random"    
##  [19] "chr13_EQ832995_random"     "chr14_EQ833009_random"    
##  [21] "chr15_EQ833041_random"     "chr17_EQ833050_random"    
##  [23] "chr18_KB442395_random"     "chr1A"                    
##  [25] "chr1A_ABQF01112188_random" "chr1A_EQ832472_random"    
##  [27] "chr2"                      "chr2_ABQF01062723_random" 
##  [29] "chr2_ABQF01082098_random"  "chr2_ABQF01114633_random" 
##  [31] "chr2_EQ832482_random"      "chr2_EQ832516_random"     
##  [33] "chr2_EQ832538_random"      "chr2_EQ832546_random"     
##  [35] "chr2_EQ832581_random"      "chr2_KB442204_random"     
##  [37] "chr2_KB442221_random"      "chr2_KB442223_random"     
##  [39] "chr2_KB442240_random"      "chr20"                    
##  [41] "chr21"                     "chr21_EQ833135_random"    
##  [43] "chr21_EQ833162_random"     "chr22_EQ833189_random"    
##  [45] "chr24"                     "chr26"                    
##  [47] "chr26_EQ833310_random"     "chr27"                    
##  [49] "chr27_EQ833329_random"     "chr3_EQ832612_random"     
##  [51] "chr3_EQ832614_random"      "chr3_EQ832615_random"     
##  [53] "chr4"                      "chr4_ABQF01084713_random" 
##  [55] "chr4_EQ832640_random"      "chr4_EQ832641_random"     
##  [57] "chr4_EQ832678_random"      "chr4_KB442255_random"     
##  [59] "chr4A"                     "chr4A_EQ832701_random"    
##  [61] "chr5"                      "chr5_EQ832721_random"     
##  [63] "chr5_EQ832723_random"      "chr5_KB442299_random"     
##  [65] "chr6"                      "chr6_ABQF01081686_random" 
##  [67] "chr6_EQ832758_random"      "chr6_EQ832760_random"     
##  [69] "chr7"                      "chr7_EQ832789_random"     
##  [71] "chr7_EQ832795_random"      "chr7_EQ832809_random"     
##  [73] "chr8_EQ832817_random"      "chr8_EQ832818_random"     
##  [75] "chr8_EQ832819_random"      "chr8_EQ832820_random"     
##  [77] "chr9"                      "chr9_EQ832848_random"     
##  [79] "chr9_EQ832871_random"      "chrM"                     
##  [81] "chrUn_ABQF01069968"        "chrUn_ABQF01072490"       
##  [83] "chrUn_ABQF01072775"        "chrUn_ABQF01074087"       
##  [85] "chrUn_ABQF01074862"        "chrUn_ABQF01076994"       
##  [87] "chrUn_ABQF01082363"        "chrUn_ABQF01083155"       
##  [89] "chrUn_ABQF01084312"        "chrUn_ABQF01084477"       
##  [91] "chrUn_ABQF01085047"        "chrUn_ABQF01085632"       
##  [93] "chrUn_ABQF01085690"        "chrUn_ABQF01086385"       
##  [95] "chrUn_ABQF01087664"        "chrUn_ABQF01088159"       
##  [97] "chrUn_ABQF01088542"        "chrUn_ABQF01088767"       
##  [99] "chrUn_ABQF01089288"        "chrUn_ABQF01089330"       
## [101] "chrUn_ABQF01090168"        "chrUn_ABQF01090310"       
## [103] "chrUn_ABQF01090506"        "chrUn_ABQF01090592"       
## [105] "chrUn_ABQF01090636"        "chrUn_ABQF01090689"       
## [107] "chrUn_ABQF01090728"        "chrUn_ABQF01091169"       
## [109] "chrUn_ABQF01091951"        "chrUn_ABQF01092022"       
## [111] "chrUn_ABQF01092886"        "chrUn_ABQF01093283"       
## [113] "chrUn_ABQF01093314"        "chrUn_ABQF01093495"       
## [115] "chrUn_ABQF01093518"        "chrUn_ABQF01093969"       
## [117] "chrUn_ABQF01094092"        "chrUn_ABQF01094352"       
## [119] "chrUn_ABQF01094483"        "chrUn_ABQF01094551"       
## [121] "chrUn_ABQF01094838"        "chrUn_ABQF01094949"       
## [123] "chrUn_ABQF01095309"        "chrUn_ABQF01095342"       
## [125] "chrUn_ABQF01095695"        "chrUn_ABQF01096525"       
## [127] "chrUn_ABQF01096564"        "chrUn_ABQF01096760"       
## [129] "chrUn_ABQF01097085"        "chrUn_ABQF01097702"       
## [131] "chrUn_ABQF01097967"        "chrUn_ABQF01098234"       
## [133] "chrUn_ABQF01098334"        "chrUn_ABQF01098429"       
## [135] "chrUn_ABQF01098525"        "chrUn_ABQF01099770"       
## [137] "chrUn_ABQF01099842"        "chrUn_ABQF01100862"       
## [139] "chrUn_ABQF01101547"        "chrUn_ABQF01101833"       
## [141] "chrUn_ABQF01101896"        "chrUn_ABQF01101953"       
## [143] "chrUn_ABQF01102061"        "chrUn_ABQF01102198"       
## [145] "chrUn_ABQF01102600"        "chrUn_ABQF01102862"       
## [147] "chrUn_ABQF01102942"        "chrUn_ABQF01103002"       
## [149] "chrUn_ABQF01103318"        "chrUn_ABQF01103343"       
## [151] "chrUn_ABQF01103484"        "chrUn_ABQF01103548"       
## [153] "chrUn_ABQF01103624"        "chrUn_ABQF01103699"       
## [155] "chrUn_ABQF01103748"        "chrUn_ABQF01104522"       
## [157] "chrUn_ABQF01104971"        "chrUn_ABQF01105060"       
## [159] "chrUn_ABQF01105458"        "chrUn_ABQF01105601"       
## [161] "chrUn_ABQF01106033"        "chrUn_ABQF01106178"       
## [163] "chrUn_ABQF01106696"        "chrUn_ABQF01107089"       
## [165] "chrUn_ABQF01107284"        "chrUn_ABQF01107436"       
## [167] "chrUn_ABQF01107567"        "chrUn_ABQF01107768"       
## [169] "chrUn_ABQF01108003"        "chrUn_ABQF01108366"       
## [171] "chrUn_ABQF01108531"        "chrUn_ABQF01108733"       
## [173] "chrUn_ABQF01109034"        "chrUn_ABQF01109100"       
## [175] "chrUn_ABQF01109472"        "chrUn_ABQF01109688"       
## [177] "chrUn_ABQF01109824"        "chrUn_ABQF01109867"       
## [179] "chrUn_ABQF01110235"        "chrUn_ABQF01110524"       
## [181] "chrUn_ABQF01111109"        "chrUn_ABQF01111384"       
## [183] "chrUn_ABQF01111411"        "chrUn_ABQF01111513"       
## [185] "chrUn_ABQF01111613"        "chrUn_ABQF01112217"       
## [187] "chrUn_ABQF01112234"        "chrUn_ABQF01112450"       
## [189] "chrUn_ABQF01113479"        "chrUn_ABQF01113531"       
## [191] "chrUn_ABQF01113863"        "chrUn_ABQF01114204"       
## [193] "chrUn_ABQF01114630"        "chrUn_ABQF01114934"       
## [195] "chrUn_ABQF01115236"        "chrUn_ABQF01115605"       
## [197] "chrUn_ABQF01115962"        "chrUn_ABQF01116227"       
## [199] "chrUn_ABQF01116908"        "chrUn_ABQF01117067"       
## [201] "chrUn_ABQF01117117"        "chrUn_ABQF01117267"       
## [203] "chrUn_ABQF01117614"        "chrUn_ABQF01117766"       
## [205] "chrUn_ABQF01117884"        "chrUn_ABQF01117920"       
## [207] "chrUn_ABQF01118092"        "chrUn_ABQF01118500"       
## [209] "chrUn_ABQF01118603"        "chrUn_ABQF01118704"       
## [211] "chrUn_ABQF01118801"        "chrUn_ABQF01119183"       
## [213] "chrUn_ABQF01119240"        "chrUn_ABQF01119696"       
## [215] "chrUn_ABQF01119809"        "chrUn_ABQF01120003"       
## [217] "chrUn_ABQF01120299"        "chrUn_ABQF01120431"       
## [219] "chrUn_ABQF01121044"        "chrUn_ABQF01121238"       
## [221] "chrUn_ABQF01122122"        "chrUn_ABQF01122402"       
## [223] "chrUn_ABQF01122426"        "chrUn_ABQF01122670"       
## [225] "chrUn_ABQF01123539"        "chrUn_EQ833462"           
## [227] "chrUn_EQ833563"            "chrUn_EQ833616"           
## [229] "chrUn_EQ833779"            "chrUn_EQ833785"           
## [231] "chrUn_EQ833889"            "chrUn_EQ834090"           
## [233] "chrUn_EQ834133"            "chrUn_EQ834330"           
## [235] "chrUn_EQ834351"            "chrUn_EQ834463"           
## [237] "chrUn_EQ834643"            "chrUn_EQ834689"           
## [239] "chrUn_EQ834704"            "chrUn_EQ834832"           
## [241] "chrUn_EQ835287"            "chrUn_EQ835460"           
## [243] "chrUn_EQ835489"            "chrUn_EQ835669"           
## [245] "chrUn_EQ835754"            "chrUn_EQ835845"           
## [247] "chrUn_EQ836162"            "chrUn_EQ836214"           
## [249] "chrUn_EQ836353"            "chrUn_EQ836495"           
## [251] "chrUn_EQ836545"            "chrUn_EQ836621"           
## [253] "chrUn_EQ836880"            "chrUn_EQ836890"           
## [255] "chrUn_EQ837681"            "chrUn_EQ837710"           
## [257] "chrUn_EQ837776"            "chrUn_EQ837931"           
## [259] "chrUn_EQ837939"            "chrUn_EQ837945"           
## [261] "chrUn_EQ837975"            "chrUn_EQ838175"           
## [263] "chrUn_EQ838367"            "chrUn_EQ838374"           
## [265] "chrUn_EQ838408"            "chrUn_EQ838538"           
## [267] "chrUn_EQ838581"            "chrUn_EQ838645"           
## [269] "chrUn_EQ838780"            "chrUn_EQ838860"           
## [271] "chrUn_EQ838978"            "chrUn_EQ839256"           
## [273] "chrUn_EQ839375"            "chrUn_EQ839565"           
## [275] "chrUn_EQ839602"            "chrUn_EQ839651"           
## [277] "chrUn_EQ839674"            "chrUn_EQ839893"           
## [279] "chrUn_EQ839908"            "chrUn_EQ840164"           
## [281] "chrUn_EQ840277"            "chrUn_EQ840364"           
## [283] "chrUn_EQ840620"            "chrUn_EQ840814"           
## [285] "chrUn_EQ841032"            "chrUn_EQ841097"           
## [287] "chrUn_EQ841536"            "chrUn_EQ841545"           
## [289] "chrUn_EQ841641"            "chrUn_EQ841849"           
## [291] "chrUn_EQ841867"            "chrUn_EQ842167"           
## [293] "chrUn_EQ842220"            "chrUn_EQ842299"           
## [295] "chrUn_EQ842346"            "chrUn_EQ842415"           
## [297] "chrUn_EQ842693"            "chrUn_EQ842723"           
## [299] "chrUn_EQ843139"            "chrUn_EQ843173"           
## [301] "chrUn_EQ843182"            "chrUn_EQ843251"           
## [303] "chrUn_EQ843287"            "chrUn_EQ843421"           
## [305] "chrUn_EQ843562"            "chrUn_EQ843614"           
## [307] "chrUn_EQ843909"            "chrUn_EQ843932"           
## [309] "chrUn_EQ843953"            "chrUn_EQ844182"           
## [311] "chrUn_EQ844202"            "chrUn_EQ844241"           
## [313] "chrUn_EQ844273"            "chrUn_EQ844683"           
## [315] "chrUn_EQ844763"            "chrUn_EQ844793"           
## [317] "chrUn_EQ844849"            "chrUn_EQ844879"           
## [319] "chrUn_EQ844892"            "chrUn_EQ844976"           
## [321] "chrUn_EQ844985"            "chrUn_EQ845273"           
## [323] "chrUn_EQ845365"            "chrUn_EQ845449"           
## [325] "chrUn_EQ845527"            "chrUn_EQ845546"           
## [327] "chrUn_EQ845853"            "chrUn_EQ845860"           
## [329] "chrUn_EQ845884"            "chrUn_EQ845902"           
## [331] "chrUn_EQ846095"            "chrUn_EQ846180"           
## [333] "chrZ_EQ833352_random"      "chrZ_EQ833358_random"     
## [335] "chrZ_EQ833367_random"
full.ery.pap.ref.fst$Chr<-gsub("chrUn_.*", "unk", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-as.factor(full.ery.pap.ref.fst$Chr)
levels(full.ery.pap.ref.fst$Chr)
##  [1] "chr1"                      "chr1_ABQF01066038_random" 
##  [3] "chr1_ABQF01112931_random"  "chr1_ABQF01116611_random" 
##  [5] "chr1_EQ832370_random"      "chr1_EQ832384_random"     
##  [7] "chr1_EQ832422_random"      "chr10"                    
##  [9] "chr10_ABQF01085055_random" "chr10_EQ832874_random"    
## [11] "chr10_EQ832893_random"     "chr11_EQ832924_random"    
## [13] "chr12_EQ832950_random"     "chr12_KB442361_random"    
## [15] "chr13"                     "chr13_EQ832957_random"    
## [17] "chr13_EQ832959_random"     "chr13_EQ832971_random"    
## [19] "chr13_EQ832995_random"     "chr14_EQ833009_random"    
## [21] "chr15_EQ833041_random"     "chr17_EQ833050_random"    
## [23] "chr18_KB442395_random"     "chr1A"                    
## [25] "chr1A_ABQF01112188_random" "chr1A_EQ832472_random"    
## [27] "chr2"                      "chr2_ABQF01062723_random" 
## [29] "chr2_ABQF01082098_random"  "chr2_ABQF01114633_random" 
## [31] "chr2_EQ832482_random"      "chr2_EQ832516_random"     
## [33] "chr2_EQ832538_random"      "chr2_EQ832546_random"     
## [35] "chr2_EQ832581_random"      "chr2_KB442204_random"     
## [37] "chr2_KB442221_random"      "chr2_KB442223_random"     
## [39] "chr2_KB442240_random"      "chr20"                    
## [41] "chr21"                     "chr21_EQ833135_random"    
## [43] "chr21_EQ833162_random"     "chr22_EQ833189_random"    
## [45] "chr24"                     "chr26"                    
## [47] "chr26_EQ833310_random"     "chr27"                    
## [49] "chr27_EQ833329_random"     "chr3_EQ832612_random"     
## [51] "chr3_EQ832614_random"      "chr3_EQ832615_random"     
## [53] "chr4"                      "chr4_ABQF01084713_random" 
## [55] "chr4_EQ832640_random"      "chr4_EQ832641_random"     
## [57] "chr4_EQ832678_random"      "chr4_KB442255_random"     
## [59] "chr4A"                     "chr4A_EQ832701_random"    
## [61] "chr5"                      "chr5_EQ832721_random"     
## [63] "chr5_EQ832723_random"      "chr5_KB442299_random"     
## [65] "chr6"                      "chr6_ABQF01081686_random" 
## [67] "chr6_EQ832758_random"      "chr6_EQ832760_random"     
## [69] "chr7"                      "chr7_EQ832789_random"     
## [71] "chr7_EQ832795_random"      "chr7_EQ832809_random"     
## [73] "chr8_EQ832817_random"      "chr8_EQ832818_random"     
## [75] "chr8_EQ832819_random"      "chr8_EQ832820_random"     
## [77] "chr9"                      "chr9_EQ832848_random"     
## [79] "chr9_EQ832871_random"      "chrM"                     
## [81] "chrZ_EQ833352_random"      "chrZ_EQ833358_random"     
## [83] "chrZ_EQ833367_random"      "unk"
#
full.ery.pap.ref.fst$Chr<-gsub(".*_random", "unk", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-as.factor(full.ery.pap.ref.fst$Chr)
levels(full.ery.pap.ref.fst$Chr)
##  [1] "chr1"  "chr10" "chr13" "chr1A" "chr2"  "chr20" "chr21" "chr24"
##  [9] "chr26" "chr27" "chr4"  "chr4A" "chr5"  "chr6"  "chr7"  "chr9" 
## [17] "chrM"  "unk"
full.ery.pap.ref.fst$Chr<-gsub("chrM", "M", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr1", 1, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr1A", "1A", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr2", 2, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr4", 4, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr4A", "4A", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr5", 5, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr6", 6, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr7", 7, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr9", 9, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr10", 10, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr13", 13, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr20", 20, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr21", 21, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr24", 24, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr26", 26, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr27", 27, full.ery.pap.ref.fst$Chr)



full.ery.pap.ref.fst$Chr<-as.factor(full.ery.pap.ref.fst$Chr)
levels(full.ery.pap.ref.fst$Chr)
##  [1] "1"   "10"  "13"  "1A"  "2"   "20"  "21"  "24"  "26"  "27"  "4"  
## [12] "4A"  "5"   "6"   "7"   "9"   "M"   "unk"

Plot Fst, nothing above .75, which means we likely aren’t in the controlling region

ggplot(data = full.ery.pap.ref.fst) + 
  geom_point(mapping = aes(x = X..Locus.ID, y = AMOVA.Fst, color = Chr))

subset outlier snps and show chromosomal position

#subset outlier snps
full.elevated.snps<-full.ery.pap.ref.fst[full.ery.pap.ref.fst$AMOVA.Fst > .5,]
full.elevated.snps<-droplevels(full.elevated.snps)
rownames(full.elevated.snps)<-c()

full.elevated.snps[,1:8]
##   X..Locus.ID Pop.1.ID Pop.2.ID Chr        BP Column Overall.Pi AMOVA.Fst
## 1       26420  papuana trichroa   1  84482164     84    0.41558   0.53333
## 2       60006  papuana trichroa  1A   5190939     15    0.26154   0.52941
## 3      126092  papuana trichroa   2 150135036     15    0.37166   0.54391
## 4      141016  papuana trichroa unk  31437307    141    0.46271   0.55170
## 5      164657  papuana trichroa unk  93297358      7    0.24603   0.56452
## 6      193526  papuana trichroa  4A  14282704     42    0.48077   0.55556
## 7      193526  papuana trichroa  4A  14282799    137    0.40897   0.65308
## 8      225414  papuana trichroa   7  21914079     80    0.38571   0.59524
## 9      253839  papuana trichroa unk  19316709     98    0.30444   0.59259

more chrom cleaning

#
Chr1<-subset(full.ery.pap.ref.fst,Chr == 1)
Chr1A<-subset(full.ery.pap.ref.fst,Chr == "1A")
Chr2<-subset(full.ery.pap.ref.fst,Chr == 2)
Chr4<-subset(full.ery.pap.ref.fst,Chr == 4)
Chr4A<-subset(full.ery.pap.ref.fst,Chr == "4A")
Chr5<-subset(full.ery.pap.ref.fst,Chr == 5)
Chr6<-subset(full.ery.pap.ref.fst,Chr == 6)
Chr7<-subset(full.ery.pap.ref.fst,Chr == 7)
Chr9<-subset(full.ery.pap.ref.fst,Chr == 9)
Chr10<-subset(full.ery.pap.ref.fst,Chr == 10)
Chr13<-subset(full.ery.pap.ref.fst,Chr == 13)
Chr20<-subset(full.ery.pap.ref.fst,Chr == 20)
Chr21<-subset(full.ery.pap.ref.fst,Chr == 21)
Chr24<-subset(full.ery.pap.ref.fst,Chr == 24)
Chr26<-subset(full.ery.pap.ref.fst,Chr == 26)
Chr27<-subset(full.ery.pap.ref.fst,Chr == 27)
ChrMT<-subset(full.ery.pap.ref.fst,Chr == "M")
Chrunk<-subset(full.ery.pap.ref.fst,Chr == "unk")

full.ery.chr.table<-as.data.frame(table(full.ery.pap.ref.fst$Chr))
names(full.ery.chr.table) <- c("chromosome", "snps")
full.ery.chr.table<-full.ery.chr.table[c(1,5,11,12,13,14,15,16,2,3,6,7,8,9,10,17,18),]
full.ery.chr.table$chromosome
##  [1] 1   2   4   4A  5   6   7   9   10  13  20  21  24  26  27  M   unk
## Levels: 1 10 13 1A 2 20 21 24 26 27 4 4A 5 6 7 9 M unk
full.ery.chr.table$length<- c(118548696, 156412533, 69780378, 20704505, 62374962,
                         36305782, 39844632, 27241186, 20806668, 16962381, 15652063, 5979137, 8021379,
                         4907541, 4618897, 16853, 174341365)
full.ery.chr.table$fst<-c(mean(Chr1$AMOVA.Fst),mean(Chr2$AMOVA.Fst),
                 mean(Chr4$AMOVA.Fst),mean(Chr4A$AMOVA.Fst),mean(Chr5$AMOVA.Fst),mean(Chr6$AMOVA.Fst),
                 mean(Chr7$AMOVA.Fst),mean(Chr9$AMOVA.Fst),mean(Chr10$AMOVA.Fst),
                 mean(Chr13$AMOVA.Fst),mean(Chr20$AMOVA.Fst),mean(Chr21$AMOVA.Fst),
                 mean(Chr24$AMOVA.Fst),mean(Chr26$AMOVA.Fst),mean(Chr27$AMOVA.Fst),
                 mean(ChrMT$AMOVA.Fst),mean(Chrunk$AMOVA.Fst))

print table with chromosomal information

full.ery.chr.table
##    chromosome  snps    length        fst
## 1           1  9943 118548696 0.02394110
## 5           2 13695 156412533 0.02370993
## 11          4  6250  69780378 0.02474484
## 12         4A   570  20704505 0.03416860
## 13          5  4375  62374962 0.02358386
## 14          6  1980  36305782 0.02298070
## 15          7  2516  39844632 0.02442485
## 16          9   880  27241186 0.02368495
## 2          10   620  20806668 0.02190340
## 3          13   565  16962381 0.02365273
## 6          20   349  15652063 0.02225877
## 7          21   100   5979137 0.03197300
## 8          24     8   8021379 0.02624625
## 9          26    27   4907541 0.02402037
## 10         27    17   4618897 0.04455471
## 17          M    14     16853 0.01287714
## 18        unk 22225 174341365 0.02588512

Print plot of chromosome length vs SNP # then print plot of avg Fst per chromosome, with SNP # above each bar

ggplot(data = full.ery.chr.table, mapping = aes(x = length, y = snps, color = chromosome))+
  geom_point() + ggtitle("snp # vs. chromosome length")

xx <- barplot(full.ery.chr.table$fst, xaxt = 'n', xlab = '', width = 0.85, ylim = c(0,.2),
              ylab = "Avg. Fst")
## Add text at top of bars
text(x = xx, y = full.ery.chr.table$fst, label = full.ery.chr.table$snps, pos = 3, cex = 0.8, col = "red")
## Add x-axis labels 
axis(1, at=xx, labels=full.ery.chr.table$chromosome, tick=FALSE, las=1, line=-0.5, cex.axis=0.5)

Show plots of different ways of visualizing Fst

ggplot(data = full.ery.pap.ref.fst) + 
  geom_point(mapping = aes(x = X..Locus.ID, y = AMOVA.Fst, color = Chr))

ggplot(data = full.elevated.snps) + 
  geom_point(mapping = aes(x = X..Locus.ID, y = AMOVA.Fst, color = Chr))

ggplot(data = full.ery.pap.ref.fst) + 
  geom_point(mapping = aes(x = X..Locus.ID, y = LOD, color = Chr))

ggplot(data = full.ery.pap.ref.fst) + 
  geom_point(mapping = aes(x = X..Locus.ID, y = Corrected.AMOVA.Fst, color = Chr))+ 
  theme(legend.position="none")

ggplot(data = full.ery.pap.ref.fst) + 
  geom_point(mapping = aes(x = X..Locus.ID, y = Smoothed.AMOVA.Fst, color = Chr))+ 
  theme(legend.position="none")

the 4A region is in a non-coding region, just downstream of gene: testis specific serine kinase 2 (TSSK2)

unlikely to actually be controlling the region, seedcracker paper set the cutoff for “divergent SNPs” at .8 Fst

looks unlikely that any of our rad loci are in controlling region

#read in admix files K 3-6
admix3<-t(as.matrix(read.table("~/Dropbox/full.ery/admix3.outfiles.qopt")))
barplot(admix3,col=1:3,space=0,xlab="Individuals",ylab="admixture")

indivs<-c("coloria_28408","coloria_28439","coloria_28562","coloria_28582",
"papuana_12155","papuana_12856","papuana_14618","papuana_14621","papuana_14622",
"papuana_14624","papuana_16064","papuana_16434","pealii_22531","pealii_22532",
"pealii_22533","pealii_24275","pealii_26533","trichroa_12043","trichroa_12054",
"trichroa_12061","trichroa_12062","trichroa_12254","trichroa_12301","trichroa_12846",
"trichroa_16071","trichroa_16075","trichroa_16086","trichroa_16164","trichroa_16234",
"trichroa_16248","trichroa_16315","trichroa_16548","trichroa_18292","trichroa_18295",
"trichroa_18320","trichroa_18327","trichroa_18389","trichroa_18401","trichroa_23619",
"trichroa_27881","trichroa_27882","trichroa_32757","trichroa_32797","trichroa_32805",
"trichroa_32819","trichroa_32834","trichroa_32838","trichroa_34978","trichroa_35027",
"trichroa_4702","trichroa_4766","trichroa_4772","trichroa_4775","trichroa_B32623",
"trichroa_B52739","trichroa_DOT-209")
barplot(admix3,col=1:3,space=0,main="Individuals",ylab="admixture", names.arg = indivs, las = 2, cex.names = .5)

#4
admix4<-t(as.matrix(read.table("~/Dropbox/full.ery/admix4.outfiles.qopt")))
barplot(admix4,col=1:4,space=0,main="Individuals",ylab="admixture", names.arg = indivs, las = 2, cex.names = .5)

#5
admix5<-t(as.matrix(read.table("~/Dropbox/full.ery/admix5.outfiles.qopt")))
barplot(admix5,col=1:5,space=0,main="Individuals",ylab="admixture", names.arg = indivs, las = 2, cex.names = .5)

#6
admix6<-t(as.matrix(read.table("~/Dropbox/full.ery/admix6.outfiles.qopt")))
barplot(admix6,col=1:6,space=0,main="Individuals",ylab="admixture", names.arg = indivs, las = 2, cex.names = .5)

Weird result from NGSadmix at K=3, should be three clean groups… but all K values makes it clear that papuana is not a distinct clade from trichroa.